LSTM-ED for Anomaly Detection in Time Series Data¶

In [ ]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from dataset import *
from plots import *
from metrics import *
from models_funtions import *

# Set style for matplotlib
plt.style.use("Solarize_Light2")

import plotly.io as pio
pio.renderers.default = "notebook_connected"
In [ ]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL =  '../dataset/normal'
ROOTDIR_DATASET_ANOMALY = '../dataset/collisions'

# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

Variours parameters¶

In [ ]:
#freq = '1.0'
#freq = '0.1'
freq = '0.01'
#freq = '0.005'

file_name_normal = "_20220811_rbtc_"
file_name_collisions = "_collision_20220811_rbtc_"

recording_normal = [0, 2, 3, 4]
recording_collisions = [1, 5]

freq_str = freq.replace(".", "_")
features_folder_normal = f"./features/normal{freq_str}/"
features_folder_collisions = f"./features/collisions{freq_str}/"

Data¶

In [ ]:
df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, f"{features_folder_normal}")
df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, f"{features_folder_collisions}1_5/")
df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, f"{features_folder_collisions}1/")
df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, f"{features_folder_collisions}5/")
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.023478269577026367 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.019075393676757812 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.013525724411010742 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Loading features from file.
--- 0.013577461242675781 seconds ---
In [ ]:
X_train, y_train, X_test, y_test, df_test = get_train_test_data(df_features_normal, df_features_collisions, full_normal=True)
X_train_1, y_train_1, X_test_1, y_test_1, df_test_1 = get_train_test_data(df_features_normal, df_features_collisions_1, full_normal=True)
X_train_5, y_train_5, X_test_5, y_test_5, df_test_5 = get_train_test_data(df_features_normal, df_features_collisions_5, full_normal=True)
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\sklearn\base.py:493: UserWarning:

X does not have valid feature names, but VarianceThreshold was fitted with feature names

Collisions¶

In [ ]:
collisions_rec1, collisions_init1 = get_collisions('1', ROOTDIR_DATASET_ANOMALY)
collisions_rec5, collisions_init5 = get_collisions('5', ROOTDIR_DATASET_ANOMALY)

# Merge the collisions of the two recordings in one dataframe
collisions_rec = pd.concat([collisions_rec1, collisions_rec5])
collisions_init = pd.concat([collisions_init1, collisions_init5])
In [ ]:
collisions_zones, y_collisions = get_collisions_zones_and_labels(collisions_rec, collisions_init, df_features_collisions)
collisions_zones_1, y_collisions_1 = get_collisions_zones_and_labels(collisions_rec1, collisions_init1, df_features_collisions_1)
collisions_zones_5, y_collisions_5 = get_collisions_zones_and_labels(collisions_rec5, collisions_init5, df_features_collisions_5)

RNN-EBM for Anomaly Detection in Time Series Data¶

In [ ]:
from algorithms.rnn_ebm import RecurrentEBM

# Disable eager execution
tf.compat.v1.disable_eager_execution()

classifier = RecurrentEBM(
    num_epochs=100,
    n_hidden=64,
    n_hidden_recurrent=32,
    min_lr=1e-4,
    min_energy=None,  # We'll set this to None initially and determine it after training
    batch_size=128,
    seed=42,
    gpu=None  # Set to None for CPU, or specify GPU index if available
)
# Train the RNN on normal data
classifier.fit(X_train)
print("RNN-EBM training completed.")
100%|██████████| 100/100 [00:14<00:00,  7.12it/s]
RNN-EBM training completed.

Predictions¶

In [ ]:
df_test = get_statistics(X_test, y_collisions, classifier, df_test, freq, threshold_type="mad")
df_test_1 = get_statistics(X_test_1, y_collisions_1, classifier, df_test_1, freq, threshold_type="mad")
df_test_5 = get_statistics(X_test_5, y_collisions_5, classifier, df_test_5, freq, threshold_type="mad")
Anomaly prediction completed.
Number of anomalies detected: 3 with threshold 24630.804321289062, std
Number of anomalies detected: 109 with threshold 135.32739639282227, mad
Number of anomalies detected: 16 with threshold 548.552001953125, percentile
Number of anomalies detected: 18 with threshold 507.74287700653076, IQR
Number of anomalies detected: 306 with threshold 0.0, zero

choosen threshold type: mad, with value: 135.3274
F1 Score: 0.8692
Accuracy: 0.9085
Precision: 0.8532
Recall: 0.8857
              precision    recall  f1-score   support

           0       0.94      0.92      0.93       201
           1       0.85      0.89      0.87       105

    accuracy                           0.91       306
   macro avg       0.90      0.90      0.90       306
weighted avg       0.91      0.91      0.91       306

ROC AUC Score: 0.9250
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:518: RuntimeWarning:

overflow encountered in cast

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:519: RuntimeWarning:

overflow encountered in cast

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 109
Best threshold: 113.2632 | F1 Score: 0.8947 | Precision: 0.8293 | Recall: 0.9714
Anomalies detected with best threshold: 123

	-------------------------------------------------------------------------------------

Anomaly prediction completed.
Number of anomalies detected: 1 with threshold 19121.05303955078, std
Number of anomalies detected: 44 with threshold 105.24900436401367, mad
Number of anomalies detected: 9 with threshold 371.66219024658193, percentile
Number of anomalies detected: 19 with threshold 185.85433959960938, IQR
Number of anomalies detected: 164 with threshold 0.0, zero

choosen threshold type: mad, with value: 105.2490
F1 Score: 0.8354
Accuracy: 0.9207
Precision: 0.7500
Recall: 0.9429
              precision    recall  f1-score   support

           0       0.98      0.91      0.95       129
           1       0.75      0.94      0.84        35

    accuracy                           0.92       164
   macro avg       0.87      0.93      0.89       164
weighted avg       0.93      0.92      0.92       164

ROC AUC Score: 0.9630
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:518: RuntimeWarning:

overflow encountered in cast

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:519: RuntimeWarning:

overflow encountered in cast

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 44
Best threshold: 99.6220 | F1 Score: 0.8642 | Precision: 0.7609 | Recall: 1.0000
Anomalies detected with best threshold: 46

	-------------------------------------------------------------------------------------

Anomaly prediction completed.
Number of anomalies detected: 2 with threshold 29960.27783203125, std
Number of anomalies detected: 12 with threshold 495.915771484375, mad
Number of anomalies detected: 8 with threshold 574.9293212890625, percentile
Number of anomalies detected: 3 with threshold 666.8313217163086, IQR
Number of anomalies detected: 141 with threshold 0.0, zero

choosen threshold type: mad, with value: 495.9158
F1 Score: 0.0294
Accuracy: 0.5319
Precision: 0.0833
Recall: 0.0179
              precision    recall  f1-score   support

           0       0.57      0.87      0.69        85
           1       0.08      0.02      0.03        56

    accuracy                           0.53       141
   macro avg       0.33      0.44      0.36       141
weighted avg       0.38      0.53      0.43       141

ROC AUC Score: 0.8189
c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:518: RuntimeWarning:

overflow encountered in cast

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\numpy\lib\type_check.py:519: RuntimeWarning:

overflow encountered in cast

No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 12
Best threshold: 192.0957 | F1 Score: 0.8271 | Precision: 0.7143 | Recall: 0.9821
Anomalies detected with best threshold: 77

	-------------------------------------------------------------------------------------

c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\src\models_funtions.py:67: RuntimeWarning:

invalid value encountered in divide

In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw, df_collisions_raw_action, collisions_zones, df_test, title="Collisions zones vs predicted zones for both recordings")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_1, df_collisions_raw_action_1, collisions_zones_1, df_test_1, title="Collisions zones vs predicted zones for recording 1")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_5, df_collisions_raw_action_5, collisions_zones_5, df_test_5, title="Collisions zones vs predicted zones for recording 5")